package cn.minimelon.solon.service.analyze.impl;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.IdUtil;
import cn.hutool.core.util.StrUtil;
import cn.minimelon.solon.constants.ESConstants;
import cn.minimelon.solon.domain.analyze.*;
import cn.minimelon.solon.domain.system.BaseCompanyInfo;
import cn.minimelon.solon.domain.system.CompanyInfoVO;
import cn.minimelon.solon.domain.system.ESQueryVO;
import cn.minimelon.solon.domain.system.EsToken;
import cn.minimelon.solon.mapper.analyze.CompanyGroupCrossMapper;
import cn.minimelon.solon.mapper.analyze.CompanyGroupMaxMapper;
import cn.minimelon.solon.mapper.analyze.CompanyManualMapper;
import cn.minimelon.solon.mapper.analyze.CompanyWordsMapper;
import cn.minimelon.solon.mapper.system.CompanyInfoMapper;
import cn.minimelon.solon.service.analyze.CompanyAnalyzeService;
import cn.minimelon.solon.service.analyze.SimilarityService;
import cn.minimelon.solon.utils.ElasticFactory;
import lombok.extern.slf4j.Slf4j;
import org.beetl.sql.core.page.PageResult;
import org.beetl.sql.core.query.LambdaQuery;
import org.beetl.sql.solon.annotation.Db;
import org.noear.esearchx.EsCommand;
import org.noear.esearchx.EsGlobal;
import org.noear.esearchx.EsQuery;
import org.noear.esearchx.PriWw;
import org.noear.esearchx.model.EsData;
import org.noear.snack.ONode;
import org.noear.solon.Solon;
import org.noear.solon.annotation.Inject;
import org.noear.solon.annotation.Component;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;

@Slf4j
@Component
public class CompanyAnalyzeServiceImpl implements CompanyAnalyzeService {
    @Inject
    private ExecutorService executorService;

    @Inject
    private SimilarityService similarityService;

    @Db("master")
    private CompanyInfoMapper companyInfoMapper;

    @Db("writer")
    private CompanyWordsMapper companyWordsMapper;

    @Db("writer")
    private CompanyManualMapper companyManualMapper;

    @Db("writer")
    private CompanyGroupMaxMapper companyGroupMaxMapper;

    @Db("writer")
    private CompanyGroupCrossMapper companyGroupCrossMapper;

    @Override
    public Long initEsData() {
        LambdaQuery<BaseCompanyInfo> queryAll = companyInfoMapper.createLambdaQuery()
                // .andLike("cname", "%3M%")
                // 医院不需要分析
                .andGreat("kind", 1);
        long count = queryAll.count();
        int pageSize = (int) Math.ceil(count / 10000.0D);
        Runnable runnable = () -> {
            long startTime = System.currentTimeMillis();
            String maxId = null;
            for (int i = 0; i < pageSize; i++) {
                LambdaQuery<BaseCompanyInfo> query = companyInfoMapper.createLambdaQuery()
                        // 医院不需要分析
                        .andGreat("kind", 1)
                        .orderBy(BaseCompanyInfo::getId)
                        // .andLike("cname", "%3M%")
                        .limit(1, 10000);
                if (i > 0 && maxId != null) {
                    query.andGreat("id", maxId);
                }
                List<BaseCompanyInfo> list = query.select();
                if (CollUtil.isEmpty(list)) {
                    continue;
                }
                // 获取最后一个公司
                maxId = list.get(list.size() - 1).getId();
                for (BaseCompanyInfo info : list) {
                    try {
                        ElasticFactory.getInstance().indice(ESConstants.ES_IDX_COMP).upsert(info.getId(), info);
                    } catch (Exception e) {
                        log.error("ES IOException");
                    }
                    ESQueryVO esQuery = new ESQueryVO();
                    esQuery.setAnalyzer(ESConstants.IK_SMART);
                    esQuery.setText(info.getCname());
                    List<EsToken> tokenList = analyzeWord(esQuery);
                    // 分拆，并插入数据库
                    insertResult(info, tokenList);
                }
            }
            log.info("initEsData 耗时: {}s", (System.currentTimeMillis() - startTime) / 1000);
        };
        executorService.submit(runnable);
        return count;
    }

    @Override
    public Long groupData() {
        long count = companyWordsMapper.createLambdaQuery()
                .andLike(CompanyWords::getOriginName, "%3M%")
                .count();
        int pageSize = (int) Math.ceil(count / 10000.0D);
        Runnable runnable = () -> {
            try {
                if (Solon.cfg().isDebugMode()) {
                    EsGlobal.onCommandBefore(cmd -> log.info("dsl:{}", cmd.getDsl()));
                }
                companyGroupMaxMapper.createLambdaQuery().delete();
                companyGroupCrossMapper.createLambdaQuery().delete();
                long startTime = System.currentTimeMillis();
                String maxId = null;
                for (int i = 0; i < pageSize; i++) {
                    LambdaQuery<CompanyWords> query = companyWordsMapper.createLambdaQuery()
                            // 按最大词组个数倒序有限匹配最多词组
                            .desc(CompanyWords::getWorkSize)
                            .andLike(CompanyWords::getOriginName, "%3M%")
                            .limit(1, 10000);
                    if (i > 0 && maxId != null) {
                        query.andGreat(CompanyWords::getId, maxId);
                    }
                    List<CompanyWords> list = query.select();
                    if (CollUtil.isEmpty(list)) {
                        continue;
                    }
                    // 获取最后一个公司
                    maxId = list.get(list.size() - 1).getId();
                    iteratorCompany(list);
                }
                log.info("groupData 耗时: {}s", (System.currentTimeMillis() - startTime) / 1000);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        };
        executorService.submit(runnable);
        return count;
    }

    @Override
    public PageResult<CompanyGroupMax> queryPage(CompanyGroupQuery query) {
        LambdaQuery<CompanyGroupMax> lambdaQuery = companyGroupMaxMapper.createLambdaQuery();
        if (StrUtil.isNotEmpty(query.getStdName())) {
            lambdaQuery.andLike(CompanyGroupMax::getStdName, "%" + StrUtil.trim(query.getStdName()) + "%");
        }
        return lambdaQuery.page(query.getPage(), query.getLimit());
    }

    @Override
    public List<CompanyGroupCross> queryCross(CompanyGroupQuery query) {
        LambdaQuery<CompanyGroupCross> lambdaQuery = companyGroupCrossMapper.createLambdaQuery()
                .desc(CompanyGroupCross::getScore);
        if (StrUtil.isNotEmpty(query.getGroupId())) {
            lambdaQuery.andEq(CompanyGroupCross::getGroupId, query.getGroupId());
        }
        return lambdaQuery.select();
    }

    private void iteratorCompany(List<CompanyWords> list) {
        for (CompanyWords info : list) {
            long cn = companyGroupCrossMapper.createLambdaQuery()
                    .andEq(CompanyGroupCross::getOriginId, info.getOriginId())
                    .count();
            if (cn > 0) {
                continue;
            }
            // ES全文检索
            try {
                EsQuery esQuery = ElasticFactory.getInstance().indice(ESConstants.ES_IDX_COMP)
                        .where(c -> c.match("cname", info.getOriginName()))
                        // 哈夫曼算法分数
                        .minScore(60)
                        .limit(100);
                EsData<CompanyInfoVO> result = esQuery.selectList(CompanyInfoVO.class);
                List<CompanyInfoVO> likeList = result.getList();
                // 插入分组，插入映射
                if (CollUtil.isNotEmpty(likeList)) {
                    algorithmSimilarity(info, likeList);
                }
            } catch (Exception ex) {
                ex.printStackTrace();
                log.error("名称: {}", info.getOriginName());
            }
        }
    }

    /**
     * 计算相似度分数：余弦定理
     * 映射分数小于最新分数，使用最新分组处理
     *
     * @param info     最大分词词组
     * @param likeList 相似数据ES
     */
    private void algorithmSimilarity(CompanyWords info, List<CompanyInfoVO> likeList) {
        String groupId = snowId("GR");
        List<CompanyGroupCross> crossList = new ArrayList<>();
        for (CompanyInfoVO likeItem : likeList) {
            double score = similarityService.calculateScore(info.getOriginName(), likeItem.getCname()) * 100.0;
            CompanyGroupCross old = companyGroupCrossMapper.createLambdaQuery()
                    .andEq(CompanyGroupCross::getOriginId, likeItem.getId())
                    .single();
            if (old != null) {
                continue;
            }
            // 余弦定理分数
            if (score > 78.8) {
                CompanyGroupCross cross = new CompanyGroupCross();
                cross.setId(snowId("CR"));
                cross.setGroupId(groupId);
                cross.setOriginId(likeItem.getId());
                cross.setOriginName(likeItem.getCname());
                cross.setScore(score);
                log.info("group:{} _score:{} score:{} cross:{} ", groupId, likeItem.get_score(), cross.getScore(), cross.getOriginName());
                crossList.add(cross);
            }
        }
        if (CollUtil.isNotEmpty(crossList)) {
            companyGroupCrossMapper.insertBatch(crossList);
            CompanyGroupMax groupMax = new CompanyGroupMax();
            groupMax.setId(groupId);
            groupMax.setGroupWords(info.getWordList());
            groupMax.setGroupCount(crossList.size());
            groupMax.setStdName(info.getOriginName());
            companyGroupMaxMapper.insert(groupMax);
        }
    }

    private void insertResult(BaseCompanyInfo info, List<EsToken> tokenList) {
        log.info("分词总数 {}", tokenList.size());
        long keySize = tokenList.stream()
                .filter(item -> isSupportWordType(item.getType()))
                .count();
        if (CollUtil.isEmpty(tokenList) || tokenList.size() < 2 || keySize < 2) {
            CompanyManual item = new CompanyManual();
            item.setId(info.getId());
            item.setOriginId(info.getId());
            item.setOriginName(info.getCname());
            item.setRegCode(info.getRegCode());
            item.setCompKind(info.getKind());
            log.info("手工 {}", item);
            companyManualMapper.insert(item);
        } else if (CollUtil.isNotEmpty(tokenList)) {
            CompanyWords item = new CompanyWords();
            item.setId(info.getId());
            item.setOriginId(info.getId());
            item.setOriginName(info.getCname());
            List<String> wordList = tokenList.stream().map(EsToken::getToken).collect(Collectors.toList());
            item.setWordList(StrUtil.join(" ", wordList));
            item.setWorkSize(wordList.size());
            item.setCompKind(info.getKind());
            log.info("分词 {}", item);
            companyWordsMapper.insert(item);
        }
    }

    private boolean isSupportWordType(String type) {
        return ESConstants.CN_WORD.equals(type) || ESConstants.ENGLISH.equals(type);
    }

    private List<EsToken> analyzeWord(ESQueryVO query) {
        try {
            String bodyJson = ONode.stringify(query);
            String json = ElasticFactory.getInstance().execAsBody(buildAnalyzeCmd(bodyJson));
            ONode tokens = ONode.loadStr(json).get("tokens");
            return tokens.toObjectList(EsToken.class);
        } catch (Exception ex) {
            log.error("错误{}", ex.getMessage());
        }
        return new ArrayList<>();
    }

    private EsCommand buildAnalyzeCmd(String bodyJson) {
        EsCommand cmd = new EsCommand();
        cmd.method = PriWw.method_post;
        cmd.dslType = PriWw.mime_json;
        cmd.dsl = bodyJson;
        cmd.path = ESConstants.ANALYZE;
        return cmd;
    }

    private static String snowId(String prefix) {
        long id = IdUtil.getSnowflake().nextId();
        return StrUtil.trim(prefix) + id;
    }
}
